########################################
## load packages
########################################
library(ggplot2)
library(dotwhisker)
library(tidyverse)
library(dplyr)
require(stargazer)
library("readxl")
library(ggeffects)
library(ggplot2)

select <- dplyr::select
filter <- dplyr::filter

## load data
df <- readRDS("replication.RDS")

#################################
## Table 1
## summary statistics
#################################

formular1 <- selfemployed ~	mc1d + opposition + committee + ele_system + num_term + sex 
formular2 <- outsider2 ~	mc1d + opposition + committee + ele_system + num_term + sex

df.summary <- df %>% select(selfemployed, outsider2, mc1d,
                            opposition, committee, ele_system, num_term, sex)
summary_table <- df.summary %>%
  summarize(across(everything(), list(
    Mean = ~mean(., na.rm = TRUE),
    Median = ~median(., na.rm = TRUE),
    SD = ~sd(., na.rm = TRUE),
    Min = ~min(., na.rm = TRUE),
    Max = ~max(., na.rm = TRUE),
    Sum = ~sum(., na.rm = TRUE)
  ))) %>%
  pivot_longer(everything(), 
               names_to = c("Variable", "Statistic"), 
               names_pattern = "(.*)_(.*)",
               values_to = "Value") %>%
  pivot_wider(names_from = Statistic, values_from = Value)

# Print the table
print(summary_table, n = Inf)
write.csv(summary_table, file="Table1.csv")

############################
## Table 3
############################
formular1 <- selfemployed ~	num_term + sex + ele_system + mc1d
formular2 <- selfemployed ~	mc1d + opposition + committee + ele_system + num_term + sex 
formular3 <- outsider2 ~	num_term + sex + ele_system + mc1d
formular4 <- outsider2 ~	mc1d + opposition + committee + ele_system + num_term + sex

m1 <- glm(formular1, df, family = binomial(link = "probit"))
m2 <- glm(formular2, df, family = binomial(link = "probit"))
m3 <- glm(formular3, df, family = binomial(link = "probit"))
m4 <- glm(formular4, df, family = binomial(link = "probit"))

stargazer(m1, m2, m3, m4, type = "html",
          out="Table3.doc",
           intercept.bottom = TRUE,  title="Results", align=TRUE)

###########################
## Table 4
###########################

## Excluding "한정애" & "임이자"
df.sub = subset(df, name !="한정애" & name != "임이자")
m5 <- glm(formular2, df.sub, family = binomial(link = "probit"))
m6 <- glm(formular4, df.sub, family = binomial(link = "probit"))


## a lagged variable (lagged_bill)
df <- df %>%
  arrange(date) %>% 
  group_by(committee) %>% 
  mutate(lagged_selfemployed_bill = lag(selfemployed, default = 0)) %>%  
  mutate(lagged_outsider2_bill = lag(outsider2, default = 0)) %>%  
    ungroup()

formular7<- selfemployed ~ mc1d + opposition + committee + ele_system + num_term + sex + lagged_selfemployed_bill
formular8 <- outsider2 ~	mc1d + opposition + committee + ele_system + num_term + sex + lagged_outsider2_bill

m7 <- glm(formular7, df, family = binomial(link = "probit"))
m8 <- glm(formular8, df, family = binomial(link = "probit"))


## Legislative History 
df <- df %>%
  arrange(date) %>%  
  group_by(`제안자`) %>%  
  mutate(history_selfemployed_bill = cumsum(selfemployed)) %>%  
  mutate(history_outsider2_bill = cumsum(outsider2)) %>% 
  ungroup()

# Update models with the history variable
formular9 <- selfemployed ~ mc1d + opposition + committee + ele_system + num_term + sex + history_selfemployed_bill
formular10 <- outsider2 ~	mc1d + opposition + committee + ele_system + num_term + sex + history_outsider2_bill

m9 <- glm(formular9, df, family = binomial(link = "probit"))
m10 <- glm(formular10, df, family = binomial(link = "probit"))


stargazer(m5, m6, m7, m8, m9, m10, type = "html",
          dep.var.labels = c("Bills"),
          out="Table4.doc",
          intercept.bottom = TRUE, title="Robustness Checks", align=TRUE)

###########################
## Visualize Marginal Effects of Ideology (mc1d)
###########################
pred_m2 <- ggpredict(m2, terms = "mc1d [all]") 

plot_m2 <- ggplot(pred_m2, aes(x = x, y = predicted)) +
  geom_line(color = "blue", size = 1) +
  geom_ribbon(aes(ymin = conf.low, ymax = conf.high), alpha = 0.2, fill = "blue") +
  labs(title = "Marginal Probability: Ideology vs Self-Employed Bills",
       x = "Partisan Ideology (mc1d)",
       y = "Predicted Probability of Proposing a Bill") +
  theme_minimal()

pred_m4 <- ggpredict(m4, terms = "mc1d [all]")  

plot_m4 <- ggplot(pred_m4, aes(x = x, y = predicted)) +
  geom_line(color = "red", size = 1) +
  geom_ribbon(aes(ymin = conf.low, ymax = conf.high), alpha = 0.2, fill = "red") +
  labs(title = "Marginal Probability: Ideology vs Outsider Bills",
       x = "Partisan Ideology (mc1d)",
       y = "Predicted Probability of Proposing a Bill") +
  theme_minimal()


ggsave("Marginal_Probability_SelfEmployed.png",
       plot = plot_m2,  
       bg = "white",    
       width = 8,       
       height = 6,      
       dpi = 100)       

ggsave("Marginal_Probability_Outsiders.png",
       plot = plot_m4,  
       bg = "white",    
       width = 8,      
       height = 6,      
       dpi = 100)       

